1 Import data

# Names found in `abalone.names` information file.
cols = c("sex", 
             "length",
             "diam",
             "height",
             "weight_whole",
             "weight_shucked",
             "weight_viscera",
             "weight_shell",
             "rings")

# Read the data from `abalone.data` file.
data_url = "https://archive.ics.uci.edu/ml/machine-learning-databases/abalone/abalone.data"
raw_data = readr::read_csv(data_url, cols)

2 Clean data

# Correct the automatic variable typing
data = raw_data %>% mutate(sex = factor(sex, c("M", "F", "I")))

3 Initial exploration

3.1 Fit straight line

p1 = ggplot(data, aes(x = length, y = rings)) +
  geom_point() + theme_classic(base_size = 5) +
  labs(x = "length", y = "rings") + 
  geom_smooth(method = "lm", se = FALSE)

p2 = ggplot(data, aes(x = diam, y = rings)) +
  geom_point() + theme_classic(base_size = 5) +
  labs(x = "diam", y = "rings") + 
  geom_smooth(method = "lm", se = FALSE)

p3 = ggplot(data, aes(x = height, y = rings)) +
  geom_point() + theme_classic(base_size = 5) +
  labs(x = "height", y = "rings") + 
  geom_smooth(method = "lm", se = FALSE)

p4 = ggplot(data, aes(x = weight_whole, y = rings)) +
  geom_point() + theme_classic(base_size = 5) +
  labs(x = "weight_whole", y = "rings") + 
  geom_smooth(method = "lm", se = FALSE)

p5 = ggplot(data, aes(x = weight_shucked, y = rings)) +
  geom_point() + theme_classic(base_size = 5) +
  labs(x = "weight_shucked", y = "rings") + 
  geom_smooth(method = "lm", se = FALSE)

p6 = ggplot(data, aes(x = weight_viscera, y = rings)) +
  geom_point() + theme_classic(base_size = 5) +
  labs(x = "weight_viscera", y = "rings") + 
  geom_smooth(method = "lm", se = FALSE)

p7 = ggplot(data, aes(x = weight_shell, y = rings)) +
  geom_point() + theme_classic(base_size = 5) +
  labs(x = "weight_shell", y = "rings") + 
  geom_smooth(method = "lm", se = FALSE)

grid.arrange(p1, p2, p3, p4, p5, p6, p7, nrow = 3)

3.2 Compare values and Residuals to check for Linearity

3.2.1 rings vs length

lm1 = lm(rings ~ length, 
         data = data)
lm1
## 
## Call:
## lm(formula = rings ~ length, data = data)
## 
## Coefficients:
## (Intercept)       length  
##       2.102       14.946
data1 = data %>% 
  mutate(
    resid1 = lm1$residuals,
    fitted1 = lm1$fitted.values
  )

q1 = ggplot(data1, 
            aes(x = length, y = resid1)) + 
  geom_point(size = 3) + 
  theme_classic(base_size = 5) + 
  labs(x = "length",
       y = "Residual") +
  geom_hline(yintercept = 0, col = "red") + 
  geom_smooth(method = "loess", se = FALSE)

grid.arrange(p1, q1, nrow = 1)

3.2.2 rings vs diam

lm2 = lm(rings ~ diam, 
         data = data)
lm2
## 
## Call:
## lm(formula = rings ~ diam, data = data)
## 
## Coefficients:
## (Intercept)         diam  
##       2.319       18.670
data2 = data %>% 
  mutate(
    resid2 = lm2$residuals,
    fitted2 = lm2$fitted.values
  )

q2 = ggplot(data2, 
            aes(x = diam, y = resid2)) + 
  geom_point(size = 3) + 
  theme_classic(base_size = 5) + 
  labs(x = "diam",
       y = "Residual") +
  geom_hline(yintercept = 0, col = "red")+ 
  geom_smooth(method = "loess", se = FALSE)

grid.arrange(p2, q2, nrow = 1)

3.2.3 rings vs height

data = data %>% filter(height < 0.5)
lm3 = lm(rings ~ height, 
         data = data)
lm3
## 
## Call:
## lm(formula = rings ~ height, data = data)
## 
## Coefficients:
## (Intercept)       height  
##       2.825       51.078
data3 = data %>% 
  mutate(
    resid3 = lm3$residuals,
    fitted3 = lm3$fitted.values
  )

q3 = ggplot(data3, 
            aes(x = height, y = resid3)) + 
  geom_point(size = 3) + 
  theme_classic(base_size = 5) + 
  labs(x = "height",
       y = "Residual") +
  geom_hline(yintercept = 0, col = "red")+ 
  geom_smooth(method = "loess", se = FALSE)

grid.arrange(p3, q3, nrow = 1)

3.2.4 rings vs weight_whole

lm4 = lm(rings ~ weight_whole, 
         data = data)
lm4
## 
## Call:
## lm(formula = rings ~ weight_whole, data = data)
## 
## Coefficients:
##  (Intercept)  weight_whole  
##        6.985         3.559
data4 = data %>% 
  mutate(
    resid4 = lm4$residuals,
    fitted4 = lm4$fitted.values
  )

q4 = ggplot(data4, 
            aes(x = weight_whole, y = resid4)) + 
  geom_point(size = 3) + 
  theme_classic(base_size = 5) + 
  labs(x = "weight_whole",
       y = "Residual") +
  geom_hline(yintercept = 0, col = "red")+ 
  geom_smooth(method = "loess", se = FALSE)

grid.arrange(p4, q4, nrow = 1)

3.2.5 rings vs weight_shucked

lm5 = lm(rings ~ weight_shucked, 
         data = data)
lm5
## 
## Call:
## lm(formula = rings ~ weight_shucked, data = data)
## 
## Coefficients:
##    (Intercept)  weight_shucked  
##          7.732           6.130
fitted5 = 7.737 + 6.114  * data$weight_shucked
resid5 = data$rings - fitted5
data5 = data %>% 
  mutate(
    resid5 = lm5$residuals,
    fitted5 = lm5$fitted.values
  )

q5 = ggplot(data5, 
            aes(x = weight_shucked, y = resid5)) + 
  geom_point(size = 3) + 
  theme_classic(base_size = 5) + 
  labs(x = "weight_shucked",
       y = "Residual") +
  geom_hline(yintercept = 0, col = "red")+ 
  geom_smooth(method = "loess", se = FALSE)

grid.arrange(p5, q5, nrow = 1)

3.2.6 rings vs weight_viscera

lm6 = lm(rings ~ weight_viscera, 
         data = data)
lm6
## 
## Call:
## lm(formula = rings ~ weight_viscera, data = data)
## 
## Coefficients:
##    (Intercept)  weight_viscera  
##          7.254          14.845
data6 = data %>% 
  mutate(
    resid6 = lm6$residuals,
    fitted6 = lm6$fitted.values
  )

q6 = ggplot(data6, 
            aes(x = weight_viscera, y = resid6)) + 
  geom_point(size = 3) + 
  theme_classic(base_size = 5) + 
  labs(x = "weight_viscera",
       y = "Residual") +
  geom_hline(yintercept = 0, col = "red")+ 
  geom_smooth(method = "loess", se = FALSE)

grid.arrange(p6, q6, nrow = 1)

3.2.7 rings vs weight_shell

lm7 = lm(rings ~ weight_shell, 
         data = data)
lm7
## 
## Call:
## lm(formula = rings ~ weight_shell, data = data)
## 
## Coefficients:
##  (Intercept)  weight_shell  
##         6.46         14.55
fitted7 = 6.462 + 14.536 * data$weight_shell
resid7 = data$rings - fitted7
data7 = data %>% 
  mutate(
    resid7 = lm7$residuals,
    fitted7 = lm7$fitted.values
  )

q7 = ggplot(data7, 
            aes(x = weight_shell, y = resid7)) + 
  geom_point(size = 3) + 
  theme_classic(base_size = 5) + 
  labs(x = "weight_shell",
       y = "Residual") +
  geom_hline(yintercept = 0, col = "red")+ 
  geom_smooth(method = "loess", se = FALSE)

grid.arrange(p7, q7, nrow = 1)

3.3 Check normality

library(ggfortify)
autoplot(lm1, which = 1:2)
## Warning: `arrange_()` is deprecated as of dplyr 0.7.0.
## Please use `arrange()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.

autoplot(lm2, which = 1:2)

autoplot(lm3, which = 1:2)

autoplot(lm4, which = 1:2)

autoplot(lm5, which = 1:2)

autoplot(lm6, which = 1:2)

autoplot(lm7, which = 1:2)

3.4 Use GGally

# install.packages("GGally")
library(GGally)
my_fn <- function(data, mapping, ...){
p <- ggplot(data = data, mapping = mapping) + 
    geom_point() + 
    geom_smooth(method=loess, fill="red", color="red", se = FALSE)

# geom_smooth(method=lm, fill="blue", color="blue", se = FALSE)  # this plots linear line

p
}
GGally::ggpairs(data, columns = 1:9, lower = list(continuous = my_fn)) + theme_bw(base_size = 10)

lmTotal = lm(rings ~ ., data)
summary(lmTotal)$coefficients %>% round(4)
##                Estimate Std. Error  t value Pr(>|t|)
## (Intercept)      3.6226     0.2866  12.6401   0.0000
## sexF            -0.0559     0.0828  -0.6742   0.5002
## sexI            -0.8449     0.0953  -8.8683   0.0000
## length          -1.1593     1.8007  -0.6438   0.5198
## diam             9.2492     2.2270   4.1532   0.0000
## height          23.2520     2.2763  10.2149   0.0000
## weight_whole     8.8537     0.7210  12.2791   0.0000
## weight_shucked -19.3888     0.8142 -23.8131   0.0000
## weight_viscera -11.1209     1.2878  -8.6358   0.0000
## weight_shell     7.6898     1.1267   6.8249   0.0000
lmTotal
## 
## Call:
## lm(formula = rings ~ ., data = data)
## 
## Coefficients:
##    (Intercept)            sexF            sexI          length            diam  
##        3.62262        -0.05586        -0.84490        -1.15926         9.24922  
##         height    weight_whole  weight_shucked  weight_viscera    weight_shell  
##       23.25204         8.85371       -19.38877       -11.12089         7.68979
# remotes::install_github("datalorax/equatiomatic")
library(equatiomatic)
extract_eq(lmTotal, use_coefs = TRUE)
## $$
## \operatorname{rings} = 3.62 - 0.06(\operatorname{sex}_{\operatorname{F}}) - 0.84(\operatorname{sex}_{\operatorname{I}}) - 1.16(\operatorname{length}) + 9.25(\operatorname{diam}) + 23.25(\operatorname{height}) + 8.85(\operatorname{weight\_whole}) - 19.39(\operatorname{weight\_shucked}) - 11.12(\operatorname{weight\_viscera}) + 7.69(\operatorname{weight\_shell}) + \epsilon
## $$

4 Log transformation on rings (log - linear)

4.1 Fit straight line

data = data %>% 
  mutate(lorings = log(rings))

p1 = ggplot(data, aes(x = length, y = lorings)) +
  geom_point() + theme_classic(base_size = 5) +
  labs(x = "length", y = "lorings") + 
  geom_smooth(method = "lm", se = FALSE)

p2 = ggplot(data, aes(x = diam, y = lorings)) +
  geom_point() + theme_classic(base_size = 5) +
  labs(x = "diam", y = "lorings") + 
  geom_smooth(method = "lm", se = FALSE)

p3 = ggplot(data, aes(x = height, y = lorings)) +
  geom_point() + theme_classic(base_size = 5) +
  labs(x = "height", y = "lorings") + 
  geom_smooth(method = "lm", se = FALSE)

p4 = ggplot(data, aes(x = weight_whole, y = lorings)) +
  geom_point() + theme_classic(base_size = 5) +
  labs(x = "weight_whole", y = "lorings") + 
  geom_smooth(method = "lm", se = FALSE)

p5 = ggplot(data, aes(x = weight_shucked, y = lorings)) +
  geom_point() + theme_classic(base_size = 5) +
  labs(x = "weight_shucked", y = "lorings") + 
  geom_smooth(method = "lm", se = FALSE)

p6 = ggplot(data, aes(x = weight_viscera, y = lorings)) +
  geom_point() + theme_classic(base_size = 5) +
  labs(x = "weight_viscera", y = "lorings") + 
  geom_smooth(method = "lm", se = FALSE)

p7 = ggplot(data, aes(x = weight_shell, y = lorings)) +
  geom_point() + theme_classic(base_size = 5) +
  labs(x = "weight_shell", y = "lorings") + 
  geom_smooth(method = "lm", se = FALSE)

grid.arrange(p1, p2, p3, p4, p5, p6, p7, nrow = 3)

4.2 Compare values and Residuals to check for Linearity

4.2.1 rings vs length

lm1 = lm(lorings ~ length, 
         data = data)
data1 = data %>% 
  mutate(
    resid1 = lm1$residuals,
    fitted1 = lm1$fitted.values
  )

q1 = ggplot(data1, 
            aes(x = length, y = resid1)) + 
  geom_point(size = 3) + 
  theme_classic(base_size = 5) + 
  labs(x = "length",
       y = "Residual") +
  geom_hline(yintercept = 0, col = "red") + 
  geom_smooth(method = "loess", se = FALSE)

grid.arrange(p1, q1, nrow = 1)

4.2.2 rings vs diam

lm2 = lm(lorings ~ diam, 
         data = data)
data2 = data %>% 
  mutate(
    resid2 = lm2$residuals,
    fitted2 = lm2$fitted.values
  )

q2 = ggplot(data2, 
            aes(x = diam, y = resid2)) + 
  geom_point(size = 3) + 
  theme_classic(base_size = 5) + 
  labs(x = "diam",
       y = "Residual") +
  geom_hline(yintercept = 0, col = "red")+ 
  geom_smooth(method = "loess", se = FALSE)

grid.arrange(p2, q2, nrow = 1)

4.2.3 rings vs height

lm3 = lm(lorings ~ height, 
         data = data)
data3 = data %>% 
  mutate(
    resid3 = lm3$residuals,
    fitted3 = lm3$fitted.values
  )

q3 = ggplot(data3, 
            aes(x = height, y = resid3)) + 
  geom_point(size = 3) + 
  theme_classic(base_size = 5) + 
  labs(x = "height",
       y = "Residual") +
  geom_hline(yintercept = 0, col = "red")+ 
  geom_smooth(method = "loess", se = FALSE)

grid.arrange(p3, q3, nrow = 1)

4.2.4 rings vs weight_whole

lm4 = lm(lorings ~ weight_whole, 
         data = data)
data4 = data %>% 
  mutate(
    resid4 = lm4$residuals,
    fitted4 = lm4$fitted.values
  )

q4 = ggplot(data4, 
            aes(x = weight_whole, y = resid4)) + 
  geom_point(size = 3) + 
  theme_classic(base_size = 5) + 
  labs(x = "weight_whole",
       y = "Residual") +
  geom_hline(yintercept = 0, col = "red")+ 
  geom_smooth(method = "loess", se = FALSE)

grid.arrange(p4, q4, nrow = 1)

4.2.5 rings vs weight_shucked

lm5 = lm(lorings ~ weight_shucked, 
         data = data)
data5 = data %>% 
  mutate(
    resid5 = lm5$residuals,
    fitted5 = lm5$fitted.values
  )

q5 = ggplot(data5, 
            aes(x = weight_shucked, y = resid5)) + 
  geom_point(size = 3) + 
  theme_classic(base_size = 5) + 
  labs(x = "weight_shucked",
       y = "Residual") +
  geom_hline(yintercept = 0, col = "red")+ 
  geom_smooth(method = "loess", se = FALSE)

grid.arrange(p5, q5, nrow = 1)

4.2.6 rings vs weight_viscera

lm6 = lm(lorings ~ weight_viscera, 
         data = data)
data6 = data %>% 
  mutate(
    resid6 = lm6$residuals,
    fitted6 = lm6$fitted.values
  )

q6 = ggplot(data6, 
            aes(x = weight_viscera, y = resid6)) + 
  geom_point(size = 3) + 
  theme_classic(base_size = 5) + 
  labs(x = "weight_viscera",
       y = "Residual") +
  geom_hline(yintercept = 0, col = "red")+ 
  geom_smooth(method = "loess", se = FALSE)

grid.arrange(p6, q6, nrow = 1)

4.2.7 rings vs weight_shell

lm7 = lm(lorings ~ weight_shell, 
         data = data)
data7 = data %>% 
  mutate(
    resid7 = lm7$residuals,
    fitted7 = lm7$fitted.values
  )

q7 = ggplot(data7, 
            aes(x = weight_shell, y = resid7)) + 
  geom_point(size = 3) + 
  theme_classic(base_size = 5) + 
  labs(x = "weight_shell",
       y = "Residual") +
  geom_hline(yintercept = 0, col = "red")+ 
  geom_smooth(method = "loess", se = FALSE)

grid.arrange(p7, q7, nrow = 1)

4.3 Check normality

library(ggfortify)
autoplot(lm1, which = 1:2)

# data2 %>% ggplot() +
#   aes(sample = resid2) +
#   geom_qq(size = 2) + geom_qq_line()

autoplot(lm2, which = 1:2)

# data3 %>% ggplot() +
#   aes(sample = resid3) +
#   geom_qq(size = 2) + geom_qq_line()
autoplot(lm3, which = 1:2)

# data4 %>% ggplot() +
#   aes(sample = resid4) +
#   geom_qq(size = 2) + geom_qq_line()
autoplot(lm4, which = 1:2)

# data5 %>% ggplot() +
#   aes(sample = resid5) +
#   geom_qq(size = 2) + geom_qq_line()
autoplot(lm5, which = 1:2)

# data6 %>% ggplot() +
#   aes(sample = resid6) +
#   geom_qq(size = 2) + geom_qq_line()
autoplot(lm6, which = 1:2)

autoplot(lm7, which = 1:2)

# install.packages("GGally")
library(GGally)
my_fn <- function(data, mapping, ...){
p <- ggplot(data = data, mapping = mapping) + 
    geom_point() + 
    geom_smooth(method=loess, fill="red", color="red", se = FALSE)

# geom_smooth(method=lm, fill="blue", color="blue", se = FALSE)

p
}
GGally::ggpairs(data, columns = 1:9, lower = list(continuous = my_fn)) + theme_bw(base_size = 10)

lmTotal = lm(rings ~ ., data)
summary(lmTotal)$coefficients %>% round(4)
##                Estimate Std. Error  t value Pr(>|t|)
## (Intercept)     -9.9368     0.1129 -88.0262     0.00
## sexF             0.0339     0.0258   1.3106     0.19
## sexI             0.1590     0.0301   5.2757     0.00
## length          -5.8684     0.5621 -10.4410     0.00
## diam            -3.3616     0.6974  -4.8199     0.00
## height          -3.4500     0.7227  -4.7736     0.00
## weight_whole     2.7140     0.2270  11.9552     0.00
## weight_shucked  -2.7349     0.2677 -10.2179     0.00
## weight_viscera  -1.8789     0.4043  -4.6469     0.00
## weight_shell     2.6247     0.3523   7.4501     0.00
## lorings         10.3227     0.0525 196.6333     0.00
lmTotal
## 
## Call:
## lm(formula = rings ~ ., data = data)
## 
## Coefficients:
##    (Intercept)            sexF            sexI          length            diam  
##       -9.93681         0.03386         0.15904        -5.86843        -3.36159  
##         height    weight_whole  weight_shucked  weight_viscera    weight_shell  
##       -3.44999         2.71398        -2.73492        -1.87887         2.62475  
##        lorings  
##       10.32268
extract_eq(lmTotal, use_coefs = TRUE)
## $$
## \operatorname{rings} = -9.94 + 0.03(\operatorname{sex}_{\operatorname{F}}) + 0.16(\operatorname{sex}_{\operatorname{I}}) - 5.87(\operatorname{length}) - 3.36(\operatorname{diam}) - 3.45(\operatorname{height}) + 2.71(\operatorname{weight\_whole}) - 2.73(\operatorname{weight\_shucked}) - 1.88(\operatorname{weight\_viscera}) + 2.62(\operatorname{weight\_shell}) + 10.32(\operatorname{lorings}) + \epsilon
## $$

5 Linear Log

5.1 Fit straight line

data = data %>% 
  mutate(length = log(length),
         diam = log(diam),
         height = log(height),
         weight_whole = log(weight_whole),
         weight_shucked = log(weight_shucked),
         weight_viscera = log(weight_viscera),
         weight_shell = log(weight_shell))

data = data %>% 
  filter(is.finite(height))

p1 = ggplot(data, aes(x = length, y = rings)) +
  geom_point() + theme_classic(base_size = 5) +
  labs(x = "llength", y = "rings") + 
  geom_smooth(method = "lm", se = FALSE)

p2 = ggplot(data, aes(x = diam, y = rings)) +
  geom_point() + theme_classic(base_size = 5) +
  labs(x = "ldiam", y = "rings") + 
  geom_smooth(method = "lm", se = FALSE)

p3 = ggplot(data, aes(x = height, y = rings)) +
  geom_point() + theme_classic(base_size = 5) +
  labs(x = "lheight", y = "rings") + 
  geom_smooth(method = "lm", se = FALSE)

p4 = ggplot(data, aes(x = weight_whole, y = rings)) +
  geom_point() + theme_classic(base_size = 5) +
  labs(x = "lweight_whole", y = "rings") + 
  geom_smooth(method = "lm", se = FALSE)

p5 = ggplot(data, aes(x = weight_shucked, y = rings)) +
  geom_point() + theme_classic(base_size = 5) +
  labs(x = "lweight_shucked", y = "rings") + 
  geom_smooth(method = "lm", se = FALSE)

p6 = ggplot(data, aes(x = weight_viscera, y = rings)) +
  geom_point() + theme_classic(base_size = 5) +
  labs(x = "lweight_viscera", y = "rings") + 
  geom_smooth(method = "lm", se = FALSE)

p7 = ggplot(data, aes(x = weight_shell, y = rings)) +
  geom_point() + theme_classic(base_size = 5) +
  labs(x = "lweight_shell", y = "rings") + 
  geom_smooth(method = "lm", se = FALSE)

grid.arrange(p1, p2, p3, p4, p5, p6, p7, nrow = 3)

5.2 Compare values and Residuals to check for Linearity

5.2.1 rings vs log length

lm1 = lm(rings ~ length, 
         data = data)
lm1
## 
## Call:
## lm(formula = rings ~ length, data = data)
## 
## Coefficients:
## (Intercept)       length  
##      14.446        6.648
data1 = data %>% 
  mutate(
    resid1 = lm1$residuals,
    fitted1 = lm1$fitted.values
  )

q1 = ggplot(data1, 
            aes(x = length, y = resid1)) + 
  geom_point(size = 3) + 
  theme_classic(base_size = 5) + 
  labs(x = "length",
       y = "Residual") +
  geom_hline(yintercept = 0, col = "red") + 
  geom_smooth(method = "loess", se = FALSE)

grid.arrange(p1, q1, nrow = 1)

5.2.2 rings vs height

lm3 = lm(lorings ~ height, 
         data = data)
lm3
## 
## Call:
## lm(formula = lorings ~ height, data = data)
## 
## Coefficients:
## (Intercept)       height  
##      3.6369       0.6894
data3 = data %>% 
  mutate(
    resid3 = lm3$residuals,
    fitted3 = lm3$fitted.values
  )

q3 = ggplot(data3, 
            aes(x = height, y = resid3)) + 
  geom_point(size = 3) + 
  theme_classic(base_size = 5) + 
  labs(x = "lheight",
       y = "Residual") +
  geom_hline(yintercept = 0, col = "red")+ 
  geom_smooth(method = "loess", se = FALSE)

grid.arrange(p3, q3, nrow = 1)

5.2.3 rings vs weight_whole

lm4 = lm(lorings ~ weight_whole, 
         data = data)
lm4
## 
## Call:
## lm(formula = lorings ~ weight_whole, data = data)
## 
## Coefficients:
##  (Intercept)  weight_whole  
##       2.3630        0.2661
data4 = data %>% 
  mutate(
    resid4 = lm4$residuals,
    fitted4 = lm4$fitted.values
  )

q4 = ggplot(data4, 
            aes(x = weight_whole, y = resid4)) + 
  geom_point(size = 3) + 
  theme_classic(base_size = 5) + 
  labs(x = "lweight_whole",
       y = "Residual") +
  geom_hline(yintercept = 0, col = "red")+ 
  geom_smooth(method = "loess", se = FALSE)

grid.arrange(p4, q4, nrow = 1)

5.2.4 rings vs weight_shucked

lm5 = lm(lorings ~ weight_shucked, 
         data = data)
lm5
## 
## Call:
## lm(formula = lorings ~ weight_shucked, data = data)
## 
## Coefficients:
##    (Intercept)  weight_shucked  
##         2.5471          0.2334
data5 = data %>% 
  mutate(
    resid5 = lm5$residuals,
    fitted5 = lm5$fitted.values
  )

q5 = ggplot(data5, 
            aes(x = weight_shucked, y = resid5)) + 
  geom_point(size = 3) + 
  theme_classic(base_size = 5) + 
  labs(x = "lweight_shucked",
       y = "Residual") +
  geom_hline(yintercept = 0, col = "red")+ 
  geom_smooth(method = "loess", se = FALSE)

grid.arrange(p5, q5, nrow = 1)

5.2.5 rings vs weight_viscera

lm6 = lm(lorings ~ weight_viscera, 
         data = data)
lm6
## 
## Call:
## lm(formula = lorings ~ weight_viscera, data = data)
## 
## Coefficients:
##    (Intercept)  weight_viscera  
##         2.7506          0.2558
data6 = data %>% 
  mutate(
    resid6 = lm6$residuals,
    fitted6 = lm6$fitted.values
  )

q6 = ggplot(data6, 
            aes(x = weight_viscera, y = resid6)) + 
  geom_point(size = 3) + 
  theme_classic(base_size = 5) + 
  labs(x = "lweight_viscera",
       y = "Residual") +
  geom_hline(yintercept = 0, col = "red")+ 
  geom_smooth(method = "loess", se = FALSE)

grid.arrange(p6, q6, nrow = 1)

5.2.6 rings vs weight_shell

lm7 = lm(lorings ~ weight_shell, 
         data = data)
lm7
## 
## Call:
## lm(formula = lorings ~ weight_shell, data = data)
## 
## Coefficients:
##  (Intercept)  weight_shell  
##       2.7331        0.2914
data7 = data %>% 
  mutate(
    resid7 = lm7$residuals,
    fitted7 = lm7$fitted.values
  )

q7 = ggplot(data7, 
            aes(x = weight_shell, y = resid7)) + 
  geom_point(size = 3) + 
  theme_classic(base_size = 5) + 
  labs(x = "lweight_shell",
       y = "Residual") +
  geom_hline(yintercept = 0, col = "red")+ 
  geom_smooth(method = "loess", se = FALSE)

grid.arrange(p7, q7, nrow = 1)

5.3 Check normality

library(ggfortify)
autoplot(lm1, which = 1:2)

autoplot(lm2, which = 1:2)

autoplot(lm3, which = 1:2)

autoplot(lm4, which = 1:2)

autoplot(lm5, which = 1:2)

autoplot(lm6, which = 1:2)

autoplot(lm7, which = 1:2)

5.4 Use GGally

# install.packages("GGally")
library(GGally)
my_fn <- function(data, mapping, ...){
p <- ggplot(data = data, mapping = mapping) + 
    geom_point() + 
    geom_smooth(method=loess, fill="red", color="red", se = FALSE)

# geom_smooth(method=lm, fill="blue", color="blue", se = FALSE)

p
}
GGally::ggpairs(data, columns = 1:9, lower = list(continuous = my_fn)) + theme_bw(base_size = 10)

lmTotal = lm(rings ~ ., data)
summary(lmTotal)$coefficients %>% round(4)
##                Estimate Std. Error  t value Pr(>|t|)
## (Intercept)    -18.0457     0.3243 -55.6473   0.0000
## sexF             0.0534     0.0272   1.9649   0.0495
## sexI             0.0501     0.0318   1.5730   0.1158
## length          -1.5621     0.3139  -4.9758   0.0000
## diam            -1.2938     0.2888  -4.4805   0.0000
## height          -0.1962     0.1025  -1.9144   0.0556
## weight_whole     1.4840     0.1887   7.8625   0.0000
## weight_shucked  -0.5929     0.0996  -5.9533   0.0000
## weight_viscera  -0.4602     0.0717  -6.4205   0.0000
## weight_shell    -0.0211     0.0987  -0.2135   0.8309
## lorings         10.7884     0.0581 185.5459   0.0000
lmTotal
## 
## Call:
## lm(formula = rings ~ ., data = data)
## 
## Coefficients:
##    (Intercept)            sexF            sexI          length            diam  
##      -18.04574         0.05343         0.05007        -1.56208        -1.29377  
##         height    weight_whole  weight_shucked  weight_viscera    weight_shell  
##       -0.19619         1.48397        -0.59289        -0.46021        -0.02108  
##        lorings  
##       10.78844
extract_eq(lmTotal, use_coefs = TRUE)
## $$
## \operatorname{rings} = -18.05 + 0.05(\operatorname{sex}_{\operatorname{F}}) + 0.05(\operatorname{sex}_{\operatorname{I}}) - 1.56(\operatorname{length}) - 1.29(\operatorname{diam}) - 0.2(\operatorname{height}) + 1.48(\operatorname{weight\_whole}) - 0.59(\operatorname{weight\_shucked}) - 0.46(\operatorname{weight\_viscera}) - 0.02(\operatorname{weight\_shell}) + 10.79(\operatorname{lorings}) + \epsilon
## $$

6 Log Log

6.1 Fit straight line

data = data %>% 
  mutate(rings = log(rings))

p1 = ggplot(data, aes(x = length, y = rings)) +
  geom_point() + theme_classic(base_size = 5) +
  labs(x = "llength", y = "lorings") + 
  geom_smooth(method = "lm", se = FALSE)

p2 = ggplot(data, aes(x = diam, y = rings)) +
  geom_point() + theme_classic(base_size = 5) +
  labs(x = "ldiam", y = "lorings") + 
  geom_smooth(method = "lm", se = FALSE)

p3 = ggplot(data, aes(x = height, y = rings)) +
  geom_point() + theme_classic(base_size = 5) +
  labs(x = "lheight", y = "lorings") + 
  geom_smooth(method = "lm", se = FALSE)

p4 = ggplot(data, aes(x = weight_whole, y = rings)) +
  geom_point() + theme_classic(base_size = 5) +
  labs(x = "lweight_whole", y = "lorings") + 
  geom_smooth(method = "lm", se = FALSE)

p5 = ggplot(data, aes(x = weight_shucked, y = rings)) +
  geom_point() + theme_classic(base_size = 5) +
  labs(x = "lweight_shucked", y = "lorings") + 
  geom_smooth(method = "lm", se = FALSE)

p6 = ggplot(data, aes(x = weight_viscera, y = rings)) +
  geom_point() + theme_classic(base_size = 5) +
  labs(x = "lweight_viscera", y = "lorings") + 
  geom_smooth(method = "lm", se = FALSE)

p7 = ggplot(data, aes(x = weight_shell, y = rings)) +
  geom_point() + theme_classic(base_size = 5) +
  labs(x = "lweight_shell", y = "lorings") + 
  geom_smooth(method = "lm", se = FALSE)

grid.arrange(p1, p2, p3, p4, p5, p6, p7, nrow = 3)

6.2 Compare values and Residuals to check for Linearity

6.2.1 rings vs length

lm1 = lm(rings ~ length, 
         data = data)
lm1
## 
## Call:
## lm(formula = rings ~ length, data = data)
## 
## Coefficients:
## (Intercept)       length  
##      2.7872       0.7978
data1 = data %>% 
  mutate(
    resid1 = lm1$residuals,
    fitted1 = lm1$fitted.values
  )

q1 = ggplot(data1, 
            aes(x = length, y = resid1)) + 
  geom_point(size = 3) + 
  theme_classic(base_size = 5) + 
  labs(x = "length",
       y = "Residual") +
  geom_hline(yintercept = 0, col = "red") + 
  geom_smooth(method = "loess", se = FALSE)

grid.arrange(p1, q1, nrow = 1)

6.2.2 rings vs diam

lm2 = lm(rings ~ diam, 
         data = data)
lm2
## 
## Call:
## lm(formula = rings ~ diam, data = data)
## 
## Coefficients:
## (Intercept)         diam  
##      2.9547       0.7592
data2 = data %>% 
  mutate(
    resid2 = lm2$residuals,
    fitted2 = lm2$fitted.values
  )

q2 = ggplot(data2, 
            aes(x = diam, y = resid2)) + 
  geom_point(size = 3) + 
  theme_classic(base_size = 5) + 
  labs(x = "ldiam",
       y = "Residual") +
  geom_hline(yintercept = 0, col = "red")+ 
  geom_smooth(method = "loess", se = FALSE)

grid.arrange(p2, q2, nrow = 1)

6.2.3 rings vs height

lm3 = lm(rings ~ height, 
         data = data)
lm3
## 
## Call:
## lm(formula = rings ~ height, data = data)
## 
## Coefficients:
## (Intercept)       height  
##      3.6369       0.6894
data3 = data %>% 
  mutate(
    resid3 = lm3$residuals,
    fitted3 = lm3$fitted.values
  )

q3 = ggplot(data3, 
            aes(x = height, y = resid3)) + 
  geom_point(size = 3) + 
  theme_classic(base_size = 5) + 
  labs(x = "lheight",
       y = "Residual") +
  geom_hline(yintercept = 0, col = "red")+ 
  geom_smooth(method = "loess", se = FALSE)

grid.arrange(p3, q3, nrow = 1)

6.2.4 rings vs weight_whole

lm4 = lm(rings ~ weight_whole, 
         data = data)
lm4
## 
## Call:
## lm(formula = rings ~ weight_whole, data = data)
## 
## Coefficients:
##  (Intercept)  weight_whole  
##       2.3630        0.2661
data4 = data %>% 
  mutate(
    resid4 = lm4$residuals,
    fitted4 = lm4$fitted.values
  )

q4 = ggplot(data4, 
            aes(x = weight_whole, y = resid4)) + 
  geom_point(size = 3) + 
  theme_classic(base_size = 5) + 
  labs(x = "lweight_whole",
       y = "Residual") +
  geom_hline(yintercept = 0, col = "red")+ 
  geom_smooth(method = "loess", se = FALSE)

grid.arrange(p4, q4, nrow = 1)

6.2.5 rings vs weight_shucked

lm5 = lm(rings ~ weight_shucked, 
         data = data)
lm5
## 
## Call:
## lm(formula = rings ~ weight_shucked, data = data)
## 
## Coefficients:
##    (Intercept)  weight_shucked  
##         2.5471          0.2334
data5 = data %>% 
  mutate(
    resid5 = lm5$residuals,
    fitted5 = lm5$fitted.values
  )

q5 = ggplot(data5, 
            aes(x = weight_shucked, y = resid5)) + 
  geom_point(size = 3) + 
  theme_classic(base_size = 5) + 
  labs(x = "lweight_shucked",
       y = "Residual") +
  geom_hline(yintercept = 0, col = "red")+ 
  geom_smooth(method = "loess", se = FALSE)

grid.arrange(p5, q5, nrow = 1)

6.2.6 rings vs weight_viscera

lm6 = lm(rings ~ weight_viscera, 
         data = data)
lm6
## 
## Call:
## lm(formula = rings ~ weight_viscera, data = data)
## 
## Coefficients:
##    (Intercept)  weight_viscera  
##         2.7506          0.2558
data6 = data %>% 
  mutate(
    resid6 = lm6$residuals,
    fitted6 = lm6$fitted.values
  )

q6 = ggplot(data6, 
            aes(x = weight_viscera, y = resid6)) + 
  geom_point(size = 3) + 
  theme_classic(base_size = 5) + 
  labs(x = "lweight_viscera",
       y = "Residual") +
  geom_hline(yintercept = 0, col = "red")+ 
  geom_smooth(method = "loess", se = FALSE)

grid.arrange(p6, q6, nrow = 1)

6.2.7 rings vs weight_shell

lm7 = lm(rings ~ weight_shell, 
         data = data)
lm7
## 
## Call:
## lm(formula = rings ~ weight_shell, data = data)
## 
## Coefficients:
##  (Intercept)  weight_shell  
##       2.7331        0.2914
data7 = data %>% 
  mutate(
    resid7 = lm7$residuals,
    fitted7 = lm7$fitted.values
  )

q7 = ggplot(data7, 
            aes(x = weight_shell, y = resid7)) + 
  geom_point(size = 3) + 
  theme_classic(base_size = 5) + 
  labs(x = "lweight_shell",
       y = "Residual") +
  geom_hline(yintercept = 0, col = "red")+ 
  geom_smooth(method = "loess", se = FALSE)

grid.arrange(p7, q7, nrow = 1)

6.3 Check normality

# data1 %>% ggplot() +
#   aes(sample = resid1) +
#   geom_qq(size = 2) + geom_qq_line()

library(ggfortify)
autoplot(lm1, which = 1:2)

# data2 %>% ggplot() +
#   aes(sample = resid2) +
#   geom_qq(size = 2) + geom_qq_line()

autoplot(lm2, which = 1:2)

# data3 %>% ggplot() +
#   aes(sample = resid3) +
#   geom_qq(size = 2) + geom_qq_line()
autoplot(lm3, which = 1:2)

# data4 %>% ggplot() +
#   aes(sample = resid4) +
#   geom_qq(size = 2) + geom_qq_line()
autoplot(lm4, which = 1:2)

# data5 %>% ggplot() +
#   aes(sample = resid5) +
#   geom_qq(size = 2) + geom_qq_line()
autoplot(lm5, which = 1:2)

# data6 %>% ggplot() +
#   aes(sample = resid6) +
#   geom_qq(size = 2) + geom_qq_line()
autoplot(lm6, which = 1:2)

autoplot(lm7, which = 1:2)

6.4 Use GGally

# install.packages("GGally")
library(GGally)
my_fn <- function(data, mapping, ...){
p <- ggplot(data = data, mapping = mapping) + 
    geom_point() + 
    geom_smooth(method=loess, fill="red", color="red", se = FALSE)

# geom_smooth(method=lm, fill="blue", color="blue", se = FALSE)

p
}
GGally::ggpairs(data, columns = 1:9, lower = list(continuous = my_fn)) + theme_bw(base_size = 10)

lmTotal = lm(rings ~ ., data)
summary(lmTotal)$coefficients %>% round(4)
## Warning in summary.lm(lmTotal): essentially perfect fit: summary may be
## unreliable
##                Estimate Std. Error       t value Pr(>|t|)
## (Intercept)           0          0 -2.717330e+01   0.0000
## sexF                  0          0 -1.287000e-01   0.8976
## sexI                  0          0  4.511900e+00   0.0000
## length                0          0  3.372000e+00   0.0008
## diam                  0          0 -2.064300e+00   0.0391
## height                0          0 -4.413900e+00   0.0000
## weight_whole          0          0 -1.073100e+01   0.0000
## weight_shucked        0          0  2.127930e+01   0.0000
## weight_viscera        0          0  4.098300e+00   0.0000
## weight_shell          0          0 -1.371900e+01   0.0000
## lorings               1          0  3.550611e+16   0.0000
lmTotal
## 
## Call:
## lm(formula = rings ~ ., data = data)
## 
## Coefficients:
##    (Intercept)            sexF            sexI          length            diam  
##     -4.268e-15      -1.695e-18       6.957e-17       5.128e-16      -2.887e-16  
##         height    weight_whole  weight_shucked  weight_viscera    weight_shell  
##     -2.191e-16      -9.811e-16       1.026e-15       1.423e-16      -6.561e-16  
##        lorings  
##      1.000e+00
extract_eq(lmTotal, use_coefs = TRUE)
## Warning in summary.lm(x): essentially perfect fit: summary may be unreliable
## $$
## \operatorname{rings} = 0 + 0(\operatorname{sex}_{\operatorname{F}}) + 0(\operatorname{sex}_{\operatorname{I}}) + 0(\operatorname{length}) + 0(\operatorname{diam}) + 0(\operatorname{height}) + 0(\operatorname{weight\_whole}) + 0(\operatorname{weight\_shucked}) + 0(\operatorname{weight\_viscera}) + 0(\operatorname{weight\_shell}) + 1(\operatorname{lorings}) + \epsilon
## $$